In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
In [2]:
df = pd.read_csv("dataset.csv")
df.head()
Out[2]:
VIN (1-10) County City State Postal Code Model Year Make Model Electric Vehicle Type Clean Alternative Fuel Vehicle (CAFV) Eligibility Electric Range Base MSRP Legislative District DOL Vehicle ID Vehicle Location Electric Utility 2020 Census Tract
0 JTMEB3FV6N Monroe Key West FL 33040 2022 TOYOTA RAV4 PRIME Plug-in Hybrid Electric Vehicle (PHEV) Clean Alternative Fuel Vehicle Eligible 42 0 NaN 198968248 POINT (-81.80023 24.5545) NaN 12087972100
1 1G1RD6E45D Clark Laughlin NV 89029 2013 CHEVROLET VOLT Plug-in Hybrid Electric Vehicle (PHEV) Clean Alternative Fuel Vehicle Eligible 38 0 NaN 5204412 POINT (-114.57245 35.16815) NaN 32003005702
2 JN1AZ0CP8B Yakima Yakima WA 98901 2011 NISSAN LEAF Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible 73 0 15.0 218972519 POINT (-120.50721 46.60448) PACIFICORP 53077001602
3 1G1FW6S08H Skagit Concrete WA 98237 2017 CHEVROLET BOLT EV Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible 238 0 39.0 186750406 POINT (-121.7515 48.53892) PUGET SOUND ENERGY INC 53057951101
4 3FA6P0SU1K Snohomish Everett WA 98201 2019 FORD FUSION Plug-in Hybrid Electric Vehicle (PHEV) Not eligible due to low battery range 26 0 38.0 2006714 POINT (-122.20596 47.97659) PUGET SOUND ENERGY INC 53061041500
In [ ]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 112634 entries, 0 to 112633
Data columns (total 17 columns):
 #   Column                                             Non-Null Count   Dtype  
---  ------                                             --------------   -----  
 0   VIN (1-10)                                         112634 non-null  object 
 1   County                                             112634 non-null  object 
 2   City                                               112634 non-null  object 
 3   State                                              112634 non-null  object 
 4   Postal Code                                        112634 non-null  int64  
 5   Model Year                                         112634 non-null  int64  
 6   Make                                               112634 non-null  object 
 7   Model                                              112614 non-null  object 
 8   Electric Vehicle Type                              112634 non-null  object 
 9   Clean Alternative Fuel Vehicle (CAFV) Eligibility  112634 non-null  object 
 10  Electric Range                                     112634 non-null  int64  
 11  Base MSRP                                          112634 non-null  int64  
 12  Legislative District                               112348 non-null  float64
 13  DOL Vehicle ID                                     112634 non-null  int64  
 14  Vehicle Location                                   112610 non-null  object 
 15  Electric Utility                                   112191 non-null  object 
 16  2020 Census Tract                                  112634 non-null  int64  
dtypes: float64(1), int64(6), object(10)
memory usage: 14.6+ MB
In [ ]:
df.describe()
Out[ ]:
Postal Code Model Year Electric Range Base MSRP Legislative District DOL Vehicle ID 2020 Census Tract
count 112634.000000 112634.000000 112634.000000 112634.000000 112348.000000 1.126340e+05 1.126340e+05
mean 98156.226850 2019.003365 87.812987 1793.439681 29.805604 1.994567e+08 5.296650e+10
std 2648.733064 2.892364 102.334216 10783.753486 14.700545 9.398427e+07 1.699104e+09
min 1730.000000 1997.000000 0.000000 0.000000 1.000000 4.777000e+03 1.101001e+09
25% 98052.000000 2017.000000 0.000000 0.000000 18.000000 1.484142e+08 5.303301e+10
50% 98119.000000 2020.000000 32.000000 0.000000 34.000000 1.923896e+08 5.303303e+10
75% 98370.000000 2022.000000 208.000000 0.000000 43.000000 2.191899e+08 5.305307e+10
max 99701.000000 2023.000000 337.000000 845000.000000 49.000000 4.792548e+08 5.603300e+10
In [ ]:
df.isnull().sum()
Out[ ]:
VIN (1-10)                                             0
County                                                 0
City                                                   0
State                                                  0
Postal Code                                            0
Model Year                                             0
Make                                                   0
Model                                                 20
Electric Vehicle Type                                  0
Clean Alternative Fuel Vehicle (CAFV) Eligibility      0
Electric Range                                         0
Base MSRP                                              0
Legislative District                                 286
DOL Vehicle ID                                         0
Vehicle Location                                      24
Electric Utility                                     443
2020 Census Tract                                      0
dtype: int64
In [3]:
Data = df.dropna()
In [4]:
Data.duplicated().sum()
Out[4]:
0
In [ ]:
 
In [ ]:
 

Task 1:Apply Exploratory Data Analysis (Univariate and Bivariate) on the dataset available above.

Exploratory Data Analysis (Electric Vehicles)

Univariate Analysis

In [ ]:
# Distribution of Electronic Vehicle Types
sns.countplot(data = Data, y = "Electric Vehicle Type")
plt.show()
No description has been provided for this image
In [ ]:
# Top 5 Electronic Vechile Brands
Top_5 = Data["Make"].value_counts().head(5)
sns.barplot(x = Top_5.values, y = Top_5.index)
plt.show()
No description has been provided for this image
In [ ]:
# Distribution of Model Year
sns.histplot(Data["Model Year"], bins = 20, kde = True)
plt.show
Out[ ]:
<function matplotlib.pyplot.show(close=None, block=None)>
No description has been provided for this image
In [ ]:
 

Bivariate Analysis

In [ ]:
sns.scatterplot(data = Data, x = "Electric Range" , y = "Base MSRP" , hue = "Electric Vehicle Type")
plt.show()
No description has been provided for this image
In [ ]:
plt.figure(figsize = (12, 6))
sns.boxplot(data = Data, x = "Make", y = "Electric Range")
plt.xticks(rotation=90)
plt.show()
No description has been provided for this image
In [ ]:
 

Task 2: Create a Choropleth using plotly.express to display the number of EV vehicles based on location.

In [6]:
import plotly.express as px
In [7]:
fig = px.choropleth(df, locations='State',
                    locationmode='USA-states',
                    color='Make',
                    hover_name='Make',
                    scope='usa',
                    title='Distribution of EVs by State')
fig.show()
In [8]:
fig = px.choropleth(df, locations='State',
                    locationmode='USA-states',
                    color='Make',
                    animation_frame='Model Year',
                    scope='usa',
                    title='Year-wise Distribution of EVs by State')
fig.show()
In [9]:
Cars_year = Data.groupby(['Make', 'Model Year']).size().reset_index(name='vehicle_count')
Cars_year = Cars_year.sort_values(by='Model Year')
Cars_year = Cars_year[(Cars_year['Model Year'] >= 2016) & (Cars_year['Model Year'] <= 2023)]

fig = px.bar(
    Cars_year,
    x='vehicle_count',
    y='Make',
    color='Make',
    animation_frame='Model Year',
    orientation='h',
    title='EV Makes Count Over the Years',
    labels={'vehicle_count': 'Number of Vehicles'},
    range_x=[0, Cars_year['vehicle_count'].max() + 50]
)

fig.update_layout(
    xaxis=dict(showgrid=True, gridcolor='LightGray', tickformat=',d'),
    yaxis_title='Make',
    xaxis_title='Number of Vehicles',
    showlegend=False,
    title_x=0.5,
    margin=dict(l=50, r=50, t=50, b=50),
    width=800,
    height=600
)
fig.show()
In [ ]:
import bar_chart_race as bcr
In [ ]:
ev_counts = Data.groupby(['Model Year', 'Make']).size().unstack(fill_value=0)

bcr.bar_chart_race(
    df=ev_counts,
    filename="ev_racing_bar.mp4",
    orientation="h",
    sort="desc",
    n_bars=10,
    steps_per_period=50,
    period_length=2000,
    title={'label': 'Year-wise Sales of EV Brands', 'size': 32},
    bar_label_font={'size': 18},
    tick_label_font={'size': 14},
    fig_kwargs={'figsize': (16, 9), 'dpi': 100},
)
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]: